here::set_here()
## File .here already exists in /Users/koenvandenberge/Dropbox/research/brainStat/hbcRegenGithub/scripts
suppressPackageStartupMessages({
  library(slingshot)
  library(tradeSeq)
  library(SingleCellExperiment)
  library(cowplot)
  library(rgl)
  library(clusterExperiment)
  library(RColorBrewer)
  library(aggregation)
  library(ggplot2)
  library(pheatmap)
  library(wesanderson)
  library(UpSetR)
  library(gridExtra)
  library(NMF)
  library(nichenetr)
  library(Seurat)
  library(dplyr)
  library(tidyverse)
  library(circlize)
  library(ComplexHeatmap)
  library(NMF)
  library(msigdbr)
  library(fgsea)
})
colby <- function(values, g=4){
  if(is(values, "character")){
    cols <- as.numeric(as.factor(values))
    return(cols)
  }
  if(is(values, "factor")){
    ncl <- nlevels(values)
    if(ncl > 9){
          colpal <- c(RColorBrewer::brewer.pal(9, 'Set1'), wesanderson::wes_palette("Darjeeling1", n=ncl-9, type="continuous"))
    } else {
       colpal <- RColorBrewer::brewer.pal(9, 'Set1')
    }
    cols <- colpal[as.numeric(values)]
    return(cols)
  }
  if(is(values, "numeric")){
    pal <- wesanderson::wes_palette("Zissou1", n=g, type="continuous")
    gg <- Hmisc::cut2(values, g=g)
    if(nlevels(gg) < g){
      nl <- nlevels(gg)
      if(nl == 2) pal <- pal[c(1,g)]
    }
    cols <- pal[gg]
    return(cols)
  }
}

Import data

sds <- readRDS("../data/finalTrajectory/sling.rds")
counts <- readRDS("../data/finalTrajectory/counts_noResp_noMV.rds")
counts <- round(counts)
sce <- readRDS("../data/finalTrajectory/sce_tradeSeq20200904.rds")
load("../data/ALL_TF.Rda")
clDatta <- readRDS("../data/finalTrajectory/dattaCl_noResp_noMV.rds")

Trajectory

plot3d(reducedDim(sds), aspect = 'iso', col=brewer.pal(9,'Set1')[clDatta], alpha=.6)
plot3d.SlingshotDataSet(sds, add=TRUE, col=c("black"), lwd=4)

Nichenet analysis

library(nichenetr)
library(tidyverse)
ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds"))
lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds"))

ligand_target_matrix_original <- ligand_target_matrix
lr_network_original <- lr_network

Load ligand-target interaction matrix

# convert human to mouse
ligand_target_matrix <- ligand_target_matrix_original
colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols()
rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols()

Define expressed genes in sender and receiver population

Sender population is regenerated HBC. Receiver population is activated HBC. We define an expressed gene is a gene being expressed (non-zero) in at least 10% of the cells in that cell type.

# expressed_genes_sender <- rownames(counts)[rowMeans(counts[, clDatta == "HBC"] > 0) > 0.05]
# expressed_genes_receiver <- rownames(counts)[rowMeans(counts[, clDatta == "HBC*"] > 0) > 0.05]
# length(expressed_genes_sender)
# length(expressed_genes_receiver)
expressed_genes_sender <- expressed_genes_receiver <- rownames(counts)

Nichenet analysis: HBC vs HBC* gene set of interest

Define gene set of interest through differential expression and background genes

clDatta <- droplevels(clDatta)
clDatta <- relevel(clDatta, ref="HBC*")
library(glmGamPoi)
# fit <- glm_gp(counts, 
#               design = model.matrix(~ clDatta))
fit <- readRDS("~/tmp/glmGamFit.rds")
deRes <- test_de(fit, contrast = "clDattaHBC")
rownames(deRes) <- deRes$name
deGenes <- deRes$name[deRes$adj_pval <= 0.01]
deGenesOrd <- order(abs(deRes[deGenes, "lfc"]), decreasing=TRUE)
geneset_oi <- deGenes[1:500]

background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)]

Define potential ligands

lr_network <- lr_network_original
lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na()

ligands = lr_network %>% pull(from) %>% unique()
expressed_ligands = intersect(ligands,expressed_genes_sender)
receptors = lr_network %>% pull(to) %>% unique()
expressed_receptors = intersect(receptors,expressed_genes_receiver)

lr_network_expressed = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) 
head(lr_network_expressed)
## # A tibble: 6 x 4
##   from   to     source         database
##   <chr>  <chr>  <chr>          <chr>   
## 1 Cxcl1  Cxcr2  kegg_cytokines kegg    
## 2 Cxcl2  Cxcr2  kegg_cytokines kegg    
## 3 Cxcl5  Cxcr2  kegg_cytokines kegg    
## 4 Ppbp   Cxcr2  kegg_cytokines kegg    
## 5 Cxcl12 Cxcr4  kegg_cytokines kegg    
## 6 Cx3cl1 Cx3cr1 kegg_cytokines kegg
potential_ligands = lr_network_expressed %>% pull(from) %>% unique()
head(potential_ligands)
## [1] "Cxcl1"  "Cxcl2"  "Cxcl5"  "Ppbp"   "Cxcl12" "Cx3cl1"

Ligand activity analysis

ligand_activities = predict_ligand_activities(geneset = geneset_oi,
                                              background_expressed_genes = background_expressed_genes, 
                                              ligand_target_matrix = ligand_target_matrix, 
                                              potential_ligands = potential_ligands)

ligand_activities_hbcAct_hbc <- ligand_activities

ligand_activities %>% arrange(-pearson)
## # A tibble: 366 x 4
##    test_ligand auroc   aupr pearson
##    <chr>       <dbl>  <dbl>   <dbl>
##  1 Agrn        0.594 0.0453  0.0541
##  2 Hspg2       0.580 0.0418  0.0420
##  3 Nrg1        0.549 0.0421  0.0408
##  4 Lamb1       0.575 0.0438  0.0399
##  5 Ctf1        0.549 0.0417  0.0390
##  6 Il23a       0.554 0.0436  0.0383
##  7 Grn         0.563 0.0424  0.0372
##  8 Lamb2       0.560 0.0416  0.0371
##  9 Il24        0.558 0.0416  0.0365
## 10 Ptprt       0.558 0.0415  0.0359
## # … with 356 more rows
best_upstream_ligands = ligand_activities %>% top_n(20, pearson) %>% arrange(-pearson) %>% pull(test_ligand)

# Top 20 of most active ligands:
best_upstream_ligands
##  [1] "Agrn"   "Hspg2"  "Nrg1"   "Lamb1"  "Ctf1"   "Il23a"  "Grn"    "Lamb2" 
##  [9] "Il24"   "Ptprt"  "Jag2"   "Tdgf1"  "Camp"   "Dkk1"   "Tslp"   "Dscam" 
## [17] "Wnt10a" "Inhba"  "Inhbb"  "Pvr"
# threshold
p_hist_lig_activity = ggplot(ligand_activities, aes(x=pearson)) + 
  geom_histogram(color="black", fill="darkorange")  + 
  # geom_density(alpha=.1, fill="orange") +
  geom_vline(aes(xintercept=min(ligand_activities %>% top_n(20, pearson) %>% pull(pearson))), color="red", linetype="dashed", size=1) + 
  labs(x="ligand activity (PCC)", y = "# ligands") +
  theme_classic()
p_hist_lig_activity
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Target genes of top ligands

active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows()
active_ligand_target_links_df <- active_ligand_target_links_df[!is.na(active_ligand_target_links_df$target),]

active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = 0.25)
order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev()
order_targets = intersect(active_ligand_target_links_df$target, rownames(active_ligand_target_links)) %>% unique()
vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t()


p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized HBC-ligands","HBC* target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + scale_fill_gradient2(low = "whitesmoke",  high = "purple", breaks = c(0,0.005,0.01)) + theme(axis.text.x = element_text(face = "italic"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
color <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100)
breaks <- seq(0, 0.01, length=100)
breaks <- c(breaks, 0.05)
annoRowDf <- data.frame(meanExp = log1p(rowMeans(counts[rownames(vis_ligand_target), clDatta=="HBC"])))
rownames(annoRowDf) <- rownames(vis_ligand_target)
pheatmap::pheatmap(vis_ligand_target, cluster_rows=FALSE, cluster_cols=FALSE,
         col=color, breaks=breaks, border_color = NA, 
         annotation_row = annoRowDf)

Circos plot

current_plot_df <- vis_ligand_target
active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows()

cutoff_include_all_ligands = active_ligand_target_links_df$weight %>% quantile(0.66, na.rm=TRUE)
active_ligand_target_links_df_circos = active_ligand_target_links_df %>% filter(weight > cutoff_include_all_ligands)
ligands_to_remove = setdiff(active_ligand_target_links_df$ligand %>% unique(), active_ligand_target_links_df_circos$ligand %>% unique())
targets_to_remove = setdiff(active_ligand_target_links_df$target %>% unique(), active_ligand_target_links_df_circos$target %>% unique())
circos_links = active_ligand_target_links_df %>% filter(!target %in% targets_to_remove &!ligand %in% ligands_to_remove)
logfc = deRes$lfc
logfc_filter = deRes$name[abs(logfc) >= 2]
circos_links = circos_links %>% filter(target %in% logfc_filter)
# logfc_sender = deRes$lfc[deRes$name %in% rownames(current_plot_df)]
# circos_links = circos_links %>% arrange(logfc[circos_links$target])
#     

cdm_res = chordDiagram(circos_links, annotationTrack = "grid", preAllocateTracks = 1)
abs_max = max(logfc, na.rm = TRUE)
    # https://jokergoo.github.io/circlize_book/book/legends.html#legends
    # https://jokergoo.github.io/circlize_book/book/a-complex-example-of-chord-diagram.html
col_fun = colorRamp2(c(-abs_max, 0, abs_max), c("dodgerblue4", "white", "red"))
col_fun2 = colorRamp2(c(0, max(ligand_activities[, "pearson"])), c("white", "green4"))
lgd_links = Legend(at = c(-4, -2, 0, 2, 4), col_fun = col_fun, title_position = "topleft", title = "LogFC")
lgd_act = Legend(at = c(0, ceiling(100*max(ligand_activities[, "pearson"])))/100, col_fun = col_fun2, title_position = "topleft", title = "Ligand Activity")
lgd_list_vertical = packLegend(lgd_links, lgd_act)

ylim = get.cell.meta.data("ylim", sector.index = circos_links$ligand[1], track.index = 1)
y1 = ylim[2] - (ylim[2] - ylim[1])*0.9
y2 = ylim[2] - (ylim[2] - ylim[1])*0.75
y3 = ylim[2] - (ylim[2] - ylim[1])*0.74
y4 = ylim[2] - (ylim[2] - ylim[1])*0.59
    
for(i in seq_len(nrow(cdm_res))) {
  if(cdm_res$value1[i] > 0) {
      circos.rect(
          cdm_res[i, "x1"], y1, 
          cdm_res[i, "x1"] - abs(cdm_res[i, "value1"]), y1 + (y2-y1)*0.7,
          col = col_fun(min(5, max(-5, logfc[cdm_res$rn[i]]))),
          border = col_fun(min(5, max(-5, logfc[cdm_res$rn[i]]))),
          sector.index = cdm_res$rn[i], track.index = 1)
      circos.rect(
          cdm_res[i, "x1"], y3 + (y4-y3)*0.3, 
          cdm_res[i, "x1"] - abs(cdm_res[i, "value1"]), y4,
          col = col_fun2(ligand_activities %>% filter(test_ligand == cdm_res$rn[i]) %>% .$pearson),
          border = col_fun2(ligand_activities %>% filter(test_ligand == cdm_res$rn[i]) %>% .$pearson),
          sector.index = cdm_res$rn[i], track.index = 1)
      circos.rect(
          cdm_res[i, "x2"], y1 + (y2-y1)*0.3, 
          cdm_res[i, "x2"] - abs(cdm_res[i, "value1"]), y2,
          col = col_fun(min(5, max(-5, logfc[cdm_res$cn[i]]))),
          border = col_fun(min(5, max(-5, logfc[cdm_res$cn[i]]))),
          sector.index = cdm_res$cn[i], track.index = 1)
        }
    }
    circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
      xlim = get.cell.meta.data("xlim")
      sector.name = get.cell.meta.data("sector.index")
      circos.text(mean(xlim), y4 + .1, sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5), cex = 0.5)
    }, bg.border = NA)
    draw(lgd_list_vertical, x = unit(0.95, "npc"), y = unit(0.85, "npc"), just = c("top", "right"))

    circos.clear()

Nichenet analysis: HBC* vs all

Define gene set of interest through differential expression and background genes

clDatta <- droplevels(clDatta)
clDatta <- relevel(clDatta, ref="HBC*")
library(glmGamPoi)
# fit <- glm_gp(counts, 
#               design = model.matrix(~ clDatta))
L <- matrix(0, nrow=ncol(fit$Beta), ncol=1)
rownames(L) <- colnames(fit$Beta)
L[,1] <- 1/5
deRes <- test_de(fit, 
                 contrast = L)
rownames(deRes) <- deRes$name
# deGenes <- deRes$name[deRes$adj_pval <= 0.01 & abs(deRes$lfc) > log2(2)]
deGenes <- deRes$name[deRes$adj_pval <= 0.01]
deGenesOrd <- order(abs(deRes[deGenes, "lfc"]), decreasing=TRUE)
length(deGenes)
## [1] 11429
geneset_oi <- deGenes[deGenesOrd[1:500]]

background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)]

Define potential ligands

lr_network <- lr_network_original
lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na()

ligands = lr_network %>% pull(from) %>% unique()
expressed_ligands = intersect(ligands,expressed_genes_sender)
receptors = lr_network %>% pull(to) %>% unique()
expressed_receptors = intersect(receptors,expressed_genes_receiver)

lr_network_expressed = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) 
head(lr_network_expressed)
## # A tibble: 6 x 4
##   from   to     source         database
##   <chr>  <chr>  <chr>          <chr>   
## 1 Cxcl1  Cxcr2  kegg_cytokines kegg    
## 2 Cxcl2  Cxcr2  kegg_cytokines kegg    
## 3 Cxcl5  Cxcr2  kegg_cytokines kegg    
## 4 Ppbp   Cxcr2  kegg_cytokines kegg    
## 5 Cxcl12 Cxcr4  kegg_cytokines kegg    
## 6 Cx3cl1 Cx3cr1 kegg_cytokines kegg
potential_ligands = lr_network_expressed %>% pull(from) %>% unique()
head(potential_ligands)
## [1] "Cxcl1"  "Cxcl2"  "Cxcl5"  "Ppbp"   "Cxcl12" "Cx3cl1"

Ligand activity analysis

ligand_activities = predict_ligand_activities(geneset = geneset_oi,
                                              background_expressed_genes = background_expressed_genes, 
                                              ligand_target_matrix = ligand_target_matrix, 
                                              potential_ligands = potential_ligands)

ligand_activities_hbcAct_all <- ligand_activities


ligand_activities %>% arrange(-pearson)
## # A tibble: 366 x 4
##    test_ligand auroc   aupr pearson
##    <chr>       <dbl>  <dbl>   <dbl>
##  1 Tnf         0.507 0.0527  0.0837
##  2 Tgfb1       0.494 0.0501  0.0602
##  3 Il1b        0.500 0.0446  0.0590
##  4 Apoe        0.472 0.0397  0.0492
##  5 Ccl12       0.485 0.0465  0.0478
##  6 Has2        0.521 0.0429  0.0393
##  7 Il23a       0.516 0.0470  0.0384
##  8 Il18        0.513 0.0447  0.0382
##  9 Tgfb2       0.502 0.0480  0.0358
## 10 Il15        0.508 0.0411  0.0357
## # … with 356 more rows
best_upstream_ligands = ligand_activities %>% top_n(15, pearson) %>% arrange(-pearson) %>% pull(test_ligand)

# Top 20 of most active ligands:
best_upstream_ligands
##  [1] "Tnf"    "Tgfb1"  "Il1b"   "Apoe"   "Ccl12"  "Has2"   "Il23a"  "Il18"  
##  [9] "Tgfb2"  "Il15"   "Mpz"    "App"    "Adam17" "Csf2"   "Il10"
# threshold
p_hist_lig_activity = ggplot(ligand_activities, aes(x=pearson)) + 
  geom_histogram(color="black", fill="darkorange")  + 
  # geom_density(alpha=.1, fill="orange") +
  geom_vline(aes(xintercept=min(ligand_activities %>% top_n(20, pearson) %>% pull(pearson))), color="red", linetype="dashed", size=1) + 
  labs(x="ligand activity (PCC)", y = "# ligands") +
  theme_classic()
p_hist_lig_activity
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Target genes of top ligands

active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows()
active_ligand_target_links_df <- active_ligand_target_links_df[!is.na(active_ligand_target_links_df$target),]

active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = 0.25)
order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev()
order_targets = intersect(active_ligand_target_links_df$target, rownames(active_ligand_target_links)) %>% unique()
vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t()

p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized ligands","HBC* target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + scale_fill_gradient2(low = "whitesmoke",  high = "purple", breaks = c(0,0.005,0.01)) + theme(axis.text.x = element_text(face = "italic"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
color <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100)
breaks <- seq(0, 0.01, length=100)
breaks <- c(breaks, 0.05)
annoRowDf <- data.frame(meanExp = log1p(rowMeans(counts[rownames(vis_ligand_target), clDatta=="HBC"])))
rownames(annoRowDf) <- rownames(vis_ligand_target)
pheatmap::pheatmap(vis_ligand_target, cluster_rows=FALSE, cluster_cols=FALSE,
         col=color, breaks=breaks, border_color = NA, 
         annotation_row = annoRowDf)

Circos plot

current_plot_df <- vis_ligand_target
active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows()

cutoff_include_all_ligands = active_ligand_target_links_df$weight %>% quantile(0.66, na.rm=TRUE)
active_ligand_target_links_df_circos = active_ligand_target_links_df %>% filter(weight > cutoff_include_all_ligands)
ligands_to_remove = setdiff(active_ligand_target_links_df$ligand %>% unique(), active_ligand_target_links_df_circos$ligand %>% unique())
targets_to_remove = setdiff(active_ligand_target_links_df$target %>% unique(), active_ligand_target_links_df_circos$target %>% unique())
circos_links = active_ligand_target_links_df %>% filter(!target %in% targets_to_remove &!ligand %in% ligands_to_remove)
logfc = deRes$lfc
logfc_filter = deRes$name[abs(logfc) >= 2]
circos_links = circos_links %>% filter(target %in% logfc_filter)
# logfc_sender = deRes$lfc[deRes$name %in% rownames(current_plot_df)]
# circos_links = circos_links %>% arrange(logfc[circos_links$target])
#     

cdm_res = chordDiagram(circos_links, annotationTrack = "grid", preAllocateTracks = 1)
## Note: The second link end is drawn out of sector 'Bcl2a1a'.
## Note: The second link end is drawn out of sector 'Il23a'.
abs_max = max(logfc, na.rm = TRUE)
    # https://jokergoo.github.io/circlize_book/book/legends.html#legends
    # https://jokergoo.github.io/circlize_book/book/a-complex-example-of-chord-diagram.html
col_fun = colorRamp2(c(-abs_max, 0, abs_max), c("dodgerblue4", "white", "red"))
col_fun2 = colorRamp2(c(0, max(ligand_activities[, "pearson"])), c("white", "green4"))
lgd_links = Legend(at = c(-4, -2, 0, 2, 4), col_fun = col_fun, title_position = "topleft", title = "LogFC")
lgd_act = Legend(at = c(0, ceiling(100*max(ligand_activities[, "pearson"])))/100, col_fun = col_fun2, title_position = "topleft", title = "Ligand Activity")
lgd_list_vertical = packLegend(lgd_links, lgd_act)

ylim = get.cell.meta.data("ylim", sector.index = circos_links$ligand[1], track.index = 1)
y1 = ylim[2] - (ylim[2] - ylim[1])*0.9
y2 = ylim[2] - (ylim[2] - ylim[1])*0.75
y3 = ylim[2] - (ylim[2] - ylim[1])*0.74
y4 = ylim[2] - (ylim[2] - ylim[1])*0.59
    
for(i in seq_len(nrow(cdm_res))) {
  if(cdm_res$value1[i] > 0) {
      circos.rect(
          cdm_res[i, "x1"], y1, 
          cdm_res[i, "x1"] - abs(cdm_res[i, "value1"]), y1 + (y2-y1)*0.7,
          col = col_fun(min(5, max(-5, logfc[cdm_res$rn[i]]))),
          border = col_fun(min(5, max(-5, logfc[cdm_res$rn[i]]))),
          sector.index = cdm_res$rn[i], track.index = 1)
      circos.rect(
          cdm_res[i, "x1"], y3 + (y4-y3)*0.3, 
          cdm_res[i, "x1"] - abs(cdm_res[i, "value1"]), y4,
          col = col_fun2(ligand_activities %>% filter(test_ligand == cdm_res$rn[i]) %>% .$pearson),
          border = col_fun2(ligand_activities %>% filter(test_ligand == cdm_res$rn[i]) %>% .$pearson),
          sector.index = cdm_res$rn[i], track.index = 1)
      circos.rect(
          cdm_res[i, "x2"], y1 + (y2-y1)*0.3, 
          cdm_res[i, "x2"] - abs(cdm_res[i, "value1"]), y2,
          col = col_fun(min(5, max(-5, logfc[cdm_res$cn[i]]))),
          border = col_fun(min(5, max(-5, logfc[cdm_res$cn[i]]))),
          sector.index = cdm_res$cn[i], track.index = 1)
        }
    }
## Note: 3 points are out of plotting region in sector 'Bcl2a1a', track
## '1'.
## Note: 3 points are out of plotting region in sector 'Il23a', track '1'.
    circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
      xlim = get.cell.meta.data("xlim")
      sector.name = get.cell.meta.data("sector.index")
      circos.text(mean(xlim), y4 + .1, sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5), cex = 0.5)
    }, bg.border = NA)
    draw(lgd_list_vertical, x = unit(0.95, "npc"), y = unit(0.85, "npc"), just = c("top", "right"))

    circos.clear()

Random input in terms of DE genes

prepare_ligand_target_visualization_kvdb <- function(ligand_target_df,
                                                     ligand_target_matrix, 
                                                     cutoff = 0.25) 
{
    cutoff_include_all_ligands = ligand_target_df$weight %>% 
        quantile(cutoff, na.rm=TRUE)
    ligand_target_matrix_oi = ligand_target_matrix
    ligand_target_matrix_oi[ligand_target_matrix_oi < cutoff_include_all_ligands] = 0
    
    ligand_target_vis = ligand_target_matrix_oi[ligand_target_df$target %>% 
        unique(), ligand_target_df$ligand %>% unique()]
    dim(ligand_target_vis) = c(length(ligand_target_df$target %>% 
        unique()), length(ligand_target_df$ligand %>% unique()))
    all_targets = ligand_target_df$target %>% unique()
    all_ligands = ligand_target_df$ligand %>% unique()
    rownames(ligand_target_vis) = all_targets
    colnames(ligand_target_vis) = all_ligands
    keep_targets = all_targets[ligand_target_vis %>% apply(1, 
        sum) > 0]
    keep_ligands = all_ligands[ligand_target_vis %>% apply(2, 
        sum) > 0]
    ligand_target_vis_filtered = ligand_target_vis[keep_targets, 
        keep_ligands]
    if (is.matrix(ligand_target_vis_filtered)) {
        rownames(ligand_target_vis_filtered) = keep_targets
        colnames(ligand_target_vis_filtered) = keep_ligands
    }
    else {
        dim(ligand_target_vis_filtered) = c(length(keep_targets), 
            length(keep_ligands))
        rownames(ligand_target_vis_filtered) = keep_targets
        colnames(ligand_target_vis_filtered) = keep_ligands
    }
    if (nrow(ligand_target_vis_filtered) > 1 & ncol(ligand_target_vis_filtered) > 
        1) {
        distoi = dist(1 - cor(t(ligand_target_vis_filtered)))
        hclust_obj = hclust(distoi, method = "ward.D2")
        order_targets = hclust_obj$labels[hclust_obj$order]
        distoi_targets = dist(1 - cor(ligand_target_vis_filtered))
        hclust_obj = hclust(distoi_targets, method = "ward.D2")
        order_ligands = hclust_obj$labels[hclust_obj$order]
    }
    else {
        order_targets = rownames(ligand_target_vis_filtered)
        order_ligands = colnames(ligand_target_vis_filtered)
    }
    vis_ligand_target_network = ligand_target_vis_filtered[order_targets, 
        order_ligands]
    dim(vis_ligand_target_network) = c(length(order_targets), 
        length(order_ligands))
    rownames(vis_ligand_target_network) = order_targets
    colnames(vis_ligand_target_network) = order_ligands
    return(vis_ligand_target_network)
}

runNicheNet <- function(genesOfInterest, nTopLigands,
                        expressed_genes_receiver,
                        expressed_genes_sender,
                        lr_network_original = lr_network_original){
  
  
  geneset_oi <- genesOfInterest
  background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)]
  
  lr_network <- lr_network_original
  lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na()
  
  ligands = lr_network %>% pull(from) %>% unique()
  expressed_ligands = intersect(ligands,expressed_genes_sender)
  receptors = lr_network %>% pull(to) %>% unique()
  expressed_receptors = intersect(receptors,expressed_genes_receiver)
  
  lr_network_expressed = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) 
  head(lr_network_expressed)
  
  potential_ligands = lr_network_expressed %>% pull(from) %>% unique()
  head(potential_ligands)
  
  ligand_activities = predict_ligand_activities(geneset = geneset_oi,
                                              background_expressed_genes = background_expressed_genes, 
                                              ligand_target_matrix = ligand_target_matrix, 
                                              potential_ligands = potential_ligands)

  ligand_activities %>% arrange(-pearson)
  best_upstream_ligands = ligand_activities %>% top_n(nTopLigands, pearson) %>% arrange(-pearson) %>% pull(test_ligand)
  
  # Top 20 of most active ligands:
  best_upstream_ligands
  
  # threshold
  p_hist_lig_activity = ggplot(ligand_activities, aes(x=pearson)) + 
    geom_histogram(color="black", fill="darkorange")  + 
    # geom_density(alpha=.1, fill="orange") +
    geom_vline(aes(xintercept=min(ligand_activities %>% top_n(nTopLigands, pearson) %>% pull(pearson))), color="red", linetype="dashed", size=1) + 
    labs(x="ligand activity (PCC)", y = "# ligands") +
    theme_classic()
  p_hist_lig_activity
  
  ### changed
      # active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows()

  
    active_ligand_target_links_df = best_upstream_ligands %>%
      lapply(get_weighted_ligand_target_links,
             geneset = geneset_oi, 
             ligand_target_matrix = ligand_target_matrix, 
             n = 250) %>% 
      bind_rows()
    # added:
    active_ligand_target_links_df <- active_ligand_target_links_df[!is.na(active_ligand_target_links_df$target),]
  
  active_ligand_target_links <- prepare_ligand_target_visualization_kvdb(
    ligand_target_df = active_ligand_target_links_df, 
    ligand_target_matrix = ligand_target_matrix, 
    cutoff = 0.25)
  order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev()
  order_targets = intersect(active_ligand_target_links_df$target, rownames(active_ligand_target_links)) %>% unique()
  vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t()
  
  
  p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized ligands","HBC* target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + scale_fill_gradient2(low = "whitesmoke",  high = "purple", breaks = c(0,0.005,0.01)) + theme(axis.text.x = element_text(face = "italic"))
  
  
  color <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100)
  breaks <- seq(0, 0.01, length=100)
  breaks <- c(breaks, 0.05)
  annoRowDf <- data.frame(meanExp = log1p(rowMeans(counts[rownames(vis_ligand_target), clDatta=="HBC"])))
  rownames(annoRowDf) <- rownames(vis_ligand_target)
  ph <- pheatmap::pheatmap(vis_ligand_target, cluster_rows=FALSE, cluster_cols=FALSE,
         col=color, breaks=breaks, border_color = NA, 
         annotation_row = annoRowDf)
  return(ph)
}

5000 genes as input

length(deGenes) ## 5623
phList <- list()
for(kk in 1:5){
  set.seed(kk)
  genes <- sample(rownames(counts), 5000)
  phList[[kk]] <- runNicheNet(genesOfInterest=genes, nTopLigands=20,
                        expressed_genes_receiver,
                        expressed_genes_sender,
                        lr_network_original)
  print(phList[[kk]])
}

2000 genes as input

phList2k <- list()
for(kk in 1:3){
  set.seed(kk)
  genes <- sample(rownames(counts), 2000)
  phList2k[[kk]] <- runNicheNet(genesOfInterest=genes, nTopLigands=20,
                        expressed_genes_receiver,
                        expressed_genes_sender,
                        lr_network_original)
  print(phList2k[[kk]])
}

200 genes as input

phList2k <- list()
for(kk in 1:6){
  set.seed(kk)
  genes <- sample(rownames(counts), 200)
  phList2k[[kk]] <- runNicheNet(genesOfInterest=genes, nTopLigands=15,
                        expressed_genes_receiver,
                        expressed_genes_sender,
                        lr_network_original)
  print(phList2k[[kk]])
}

Null distribution for each ligand

runNicheNetPC <- function(genesOfInterest, nTopLigands,
                        expressed_genes_receiver,
                        expressed_genes_sender,
                        lr_network = lr_network_original){
  
  
  geneset_oi <- genesOfInterest
  background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)]
  
  lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na()
  
  ligands = lr_network %>% pull(from) %>% unique()
  expressed_ligands = intersect(ligands,expressed_genes_sender)
  receptors = lr_network %>% pull(to) %>% unique()
  expressed_receptors = intersect(receptors,expressed_genes_receiver)
  
  lr_network_expressed = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) 

  potential_ligands = lr_network_expressed %>% pull(from) %>% unique()

  ligand_activities = predict_ligand_activities(geneset = geneset_oi,
                                              background_expressed_genes = background_expressed_genes, 
                                              ligand_target_matrix = ligand_target_matrix, 
                                              potential_ligands = potential_ligands)

  return(ligand_activities)
}


resList <- list()
N <- 100
genes <- sample(rownames(counts), 200)
hlp <- runNicheNetPC(genesOfInterest=genes, 
                                 nTopLigands=15,
                                  rownames(counts),
                                  rownames(counts),
                                  lr_network = lr_networkOrig)
ligandActivityNullMat <- matrix(NA, nrow=nrow(hlp), ncol=N)
rownames(ligandActivityNullMat) <- sort(hlp$test_ligand)
for(kk in 1:N){
  set.seed(kk)
  message(kk)
  genes <- sample(rownames(counts), 200)
  res <- runNicheNetPC(genesOfInterest=genes, 
                                 nTopLigands=15,
                                  rownames(counts),
                                  rownames(counts),
                                  lr_network = lr_networkOrig)
  ligandActivityNullMat[res$test_ligand, kk] <- res$pearson
}
saveRDS(ligandActivityNullMat, file = "../data/ligandActivityNullMat.rds")

Calculate evidence as is

ligandActivityNullMat <- readRDS("../data/ligandActivityNullMat.rds")

rafalib::mypar(mfrow=c(3,3))
pvalHBC <- vector(length = nrow(ligandActivityNullMat))
pvalAll <- vector(length = nrow(ligandActivityNullMat))
pccHBC <- pvalHBC
pccAll <- pvalAll
names(pvalHBC) <- names(pvalAll) <- rownames(ligandActivityNullMat)
for(kk in 1:nrow(ligandActivityNullMat)){
  curLigand <- rownames(ligandActivityNullMat)[kk]
  pccHBC[kk] <- ligand_activities_hbcAct_hbc$pearson[ligand_activities_hbcAct_all$test_ligand == curLigand]
  pccAll[kk] <- ligand_activities_hbcAct_all$pearson[ligand_activities_hbcAct_all$test_ligand == curLigand]
  hist(ligandActivityNullMat[kk,], breaks=40, xlim=c(min(ligandActivityNullMat), max(ligandActivityNullMat)), xlab="PCC", main=curLigand)
  abline(v=pccHBC[kk], col="red", lwd=2)
  abline(v=pccAll[kk], col="blue", lwd=2)
  pvalHBC[kk] <- mean(ligandActivityNullMat[curLigand,] > pccHBC[kk])
  pvalAll[kk] <- mean(ligandActivityNullMat[curLigand,] > pccAll[kk])
}

rafalib::mypar(mfrow=c(1,2))

plot(pccHBC, pccAll, pch=16, cex=2/3)
plot(x=rowMeans(cbind(pccHBC, pccAll)),
     y=pccHBC - pccAll, 
     pch=16, cex=2/3) ; abline(v=0, lty=2, col="red")

# there is a big difference in p-value because there is a general mean shift
# in the PCC between both analyses...
hist(pvalAll)
hist(pvalHBC)

Calculate evidence after centering PCC distributions

ligandActivityNullMat <- readRDS("../data/ligandActivityNullMat.rds")


ligand_activities_hbcAct_hbc$pearsonCentered <- ligand_activities_hbcAct_hbc$pearson - mean(ligand_activities_hbcAct_hbc$pearson)
ligand_activities_hbcAct_all$pearsonCentered <- ligand_activities_hbcAct_all$pearson - mean(ligand_activities_hbcAct_all$pearson)

rafalib::mypar(mfrow=c(3,3))
pvalCenteredHBC <- vector(length = nrow(ligandActivityNullMat))
pvalCenteredAll <- vector(length = nrow(ligandActivityNullMat))
pccCenteredHBC <- pvalHBC
pccCenteredAll <- pvalAll
names(pvalHBC) <- names(pvalAll) <- rownames(ligandActivityNullMat)
for(kk in 1:nrow(ligandActivityNullMat)){
  curLigand <- rownames(ligandActivityNullMat)[kk]
  pccCenteredHBC[kk] <- ligand_activities_hbcAct_hbc$pearsonCentered[ligand_activities_hbcAct_all$test_ligand == curLigand]
  pccCenteredAll[kk] <- ligand_activities_hbcAct_all$pearsonCentered[ligand_activities_hbcAct_all$test_ligand == curLigand]
  hist(ligandActivityNullMat[kk,], breaks=40, xlim=c(min(ligandActivityNullMat), max(ligandActivityNullMat)), xlab="PCC", main=curLigand)
  abline(v=pccCenteredHBC[kk], col="red", lwd=2)
  abline(v=pccCenteredAll[kk], col="blue", lwd=2)
  pvalCenteredHBC[kk] <- mean(ligandActivityNullMat[curLigand,] > pccCenteredHBC[kk])
  pvalCenteredAll[kk] <- mean(ligandActivityNullMat[curLigand,] > pccCenteredAll[kk])
}

rafalib::mypar(mfrow=c(1,2))

plot(pccCenteredHBC, pccCenteredAll, pch=16, cex=2/3)
plot(x=rowMeans(cbind(pccCenteredHBC, pccCenteredAll)),
     y=pccCenteredHBC - pccCenteredAll, 
     pch=16, cex=2/3) ; abline(h=0, lty=2, col="red")

# there is a big difference in p-value because there is a general mean shift
# in the PCC between both analyses...
hist(pvalCenteredAll)
hist(pvalCenteredHBC)